{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "CAM_colab.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "include_colab_link": true
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.8"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/mlelarge/dataflowr/blob/master/CEA_EDF_INRIA/CAM_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "EZBwDI7xiVu5"
      },
      "source": [
        "# Class Activation Map\n",
        "\n",
        "\n",
        "http://cnnlocalization.csail.mit.edu/\n",
        "\n",
        "\n",
        "https://github.com/metalbubble/CAM"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "DNUU0eixiVvB",
        "colab": {}
      },
      "source": [
        "import io\n",
        "import requests\n",
        "from PIL import Image\n",
        "import torch\n",
        "from torchvision import models, transforms\n",
        "from torch.autograd import Variable\n",
        "from torch.nn import functional as F\n",
        "import numpy as np\n",
        "import cv2\n",
        "import pdb\n",
        "from matplotlib.pyplot import imshow\n",
        "\n",
        "# input image\n",
        "LABELS_URL = 'https://s3.amazonaws.com/outcome-blog/imagenet/labels.json'\n",
        "IMG_URL = 'http://media.mlive.com/news_impact/photo/9933031-large.jpg'"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "cMgoA17fiVvT",
        "colab": {}
      },
      "source": [
        "net = models.resnet18(pretrained=True)\n",
        "finalconv_name = 'layer4'"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "bc6rDo7SiVvk",
        "colab": {}
      },
      "source": [
        "!pip install torchviz #git+https://github.com/szagoruyko/pytorchviz\n",
        "from torchviz import make_dot"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "xtavlPdDiVvz",
        "colab": {}
      },
      "source": [
        "x = torch.randn(1, 3, 224, 224).requires_grad_(True)\n",
        "y = net(x)\n",
        "make_dot(y, params=dict(list(net.named_parameters()) + [('x', x)]))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "t6zoBApLiVwD",
        "colab": {}
      },
      "source": [
        "net.eval()\n",
        "\n",
        "# hook the feature extractor\n",
        "# see https://pytorch.org/tutorials/beginner/former_torchies/nn_tutorial.html\n",
        "# for more explanations\n",
        "features_blobs = []\n",
        "def hook_feature(module, input, output):\n",
        "    print('Inside ' + module.__class__.__name__ + ' forward')\n",
        "    print('')\n",
        "    print('input: ', type(input))\n",
        "    print('input[0]: ', type(input[0]))\n",
        "    print('output: ', type(output))\n",
        "    print('')\n",
        "    print('input size:', input[0].size())\n",
        "    print('output size:', output.data.size())\n",
        "    features_blobs.append(output.data.cpu().numpy())\n",
        "\n",
        "net._modules.get(finalconv_name).register_forward_hook(hook_feature);"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "_p4QcCbfiVwO",
        "colab": {}
      },
      "source": [
        "net._modules.get(finalconv_name)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "d_5ItJNciVwe",
        "colab": {}
      },
      "source": [
        "# get the softmax weight\n",
        "params = list(net.parameters())\n",
        "weight_softmax = np.squeeze(params[-2].data.numpy())"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "RQ1Ap4EmiVwm"
      },
      "source": [
        "the last parameter is the bias"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "sBCJX2Y7iVwp",
        "colab": {}
      },
      "source": [
        "params[-1].size()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "q8S8vLosiVw4"
      },
      "source": [
        "and the before last parameter is the last matrix from the fc layer"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "0KixaI68iVw8",
        "colab": {}
      },
      "source": [
        "weight_softmax.shape"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "Eu2RytB0iVxJ"
      },
      "source": [
        "In Resnet architecture, we do average pooling before the last fc layer.\n",
        "\n",
        "<img src ='https://github.com/mlelarge/dataflowr/blob/master/CEA_EDF_INRIA/img/CAM.png?raw=1'>\n",
        "\n",
        "source Zhou et al."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "quQMxt1-iVxL",
        "colab": {}
      },
      "source": [
        "def returnCAM(feature_conv, weight_softmax, class_idx):\n",
        "    # generate the class activation maps upsample to 256x256\n",
        "    size_upsample = (256, 256)\n",
        "    bz, nc, h, w = feature_conv.shape\n",
        "    output_cam = []\n",
        "    for idx in class_idx:\n",
        "        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))\n",
        "        cam = cam.reshape(h, w)\n",
        "        cam = cam - np.min(cam)\n",
        "        cam_img = cam / np.max(cam)\n",
        "        cam_img = np.uint8(255 * cam_img)\n",
        "        output_cam.append(cv2.resize(cam_img, size_upsample))\n",
        "    return output_cam\n",
        "\n",
        "\n",
        "normalize = transforms.Normalize(\n",
        "   mean=[0.485, 0.456, 0.406],\n",
        "   std=[0.229, 0.224, 0.225]\n",
        ")\n",
        "preprocess = transforms.Compose([\n",
        "   transforms.Resize((224,224)),\n",
        "   transforms.ToTensor(),\n",
        "   normalize\n",
        "])\n",
        "\n",
        "response = requests.get(IMG_URL)\n",
        "img_pil = Image.open(io.BytesIO(response.content))\n",
        "img_pil.save('test.jpg')\n",
        "imshow(img_pil);"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "NPO1QO43iVxU",
        "colab": {}
      },
      "source": [
        "img_tensor = preprocess(img_pil)\n",
        "img_variable = Variable(img_tensor.unsqueeze(0))\n",
        "logit = net(img_variable)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "815LeSeUiVxd"
      },
      "source": [
        "Resnet architectures:\n",
        "<img src='https://github.com/mlelarge/dataflowr/blob/master/CEA_EDF_INRIA/img/resnet.png?raw=1'>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "xsf9lConiVxg",
        "colab": {}
      },
      "source": [
        "# download the imagenet category list\n",
        "classes = {int(key):value for (key, value)\n",
        "          in requests.get(LABELS_URL).json().items()}\n",
        "\n",
        "h_x = F.softmax(logit, dim=1).data.squeeze()\n",
        "probs, idx = h_x.sort(0, True)\n",
        "probs = probs.numpy()\n",
        "idx = idx.numpy()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "xa_B5bQuiVxq",
        "colab": {}
      },
      "source": [
        "# output the prediction\n",
        "for i in range(0, 5):\n",
        "    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "s25z8IRHiVx7",
        "colab": {}
      },
      "source": [
        "# generate class activation mapping for the top1 prediction\n",
        "CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])\n",
        "\n",
        "# render the CAM and output\n",
        "print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])\n",
        "img = cv2.imread('test.jpg')\n",
        "height, width, _ = img.shape\n",
        "heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)\n",
        "result = heatmap * 0.3 + img * 0.5\n",
        "cv2.imwrite('CAM.jpg', result)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "3_gSTzumiVyS",
        "colab": {}
      },
      "source": [
        "from IPython.display import Image\n",
        "Image(\"CAM.jpg\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "a9eAJWl8iVyf",
        "colab": {}
      },
      "source": [
        "# generate class activation mapping for the top1 prediction\n",
        "CAM1s = returnCAM(features_blobs[0], weight_softmax, [idx[2]])\n",
        "\n",
        "# render the CAM and output\n",
        "print('output CAM1.jpg for the top3 prediction: %s'%classes[idx[2]])\n",
        "img = cv2.imread('test.jpg')\n",
        "height, width, _ = img.shape\n",
        "heatmap = cv2.applyColorMap(cv2.resize(CAM1s[0],(width, height)), cv2.COLORMAP_JET)\n",
        "result = heatmap * 0.3 + img * 0.5\n",
        "cv2.imwrite('CAM1.jpg', result)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "Lm0KdthciVy5",
        "colab": {}
      },
      "source": [
        "Image(\"CAM1.jpg\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "4pgd_jZwjO1w",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}